data_Sleep_Fatigue_AlcoholUse <- read_csv('df_Sleep_Fatigue_AlcoholUse.csv', show_col_types = FALSE)
## New names:
## • `` -> `...1`
summary(data_Sleep_Fatigue_AlcoholUse)
## ...1 X ID TSC
## Min. : 1.00 Min. : 1.00 Min. :101.0 Min. :1.850
## 1st Qu.: 43.75 1st Qu.: 43.75 1st Qu.:143.8 1st Qu.:2.400
## Median : 86.50 Median : 86.50 Median :186.5 Median :2.850
## Mean : 86.50 Mean : 86.50 Mean :186.5 Mean :2.831
## 3rd Qu.:129.25 3rd Qu.:129.25 3rd Qu.:229.2 3rd Qu.:3.150
## Max. :172.00 Max. :172.00 Max. :272.0 Max. :4.310
## NA's :10
## FSS CIS_Fatigue_severity CIS_Concentration CIS_Motivation
## Min. :1.00 Min. :1.500 Min. :1.600 Min. :1.00
## 1st Qu.:3.00 1st Qu.:2.880 1st Qu.:3.000 1st Qu.:2.50
## Median :3.67 Median :3.815 Median :4.000 Median :3.00
## Mean :3.66 Mean :3.856 Mean :3.926 Mean :3.21
## 3rd Qu.:4.22 3rd Qu.:4.620 3rd Qu.:4.600 3rd Qu.:3.75
## Max. :7.00 Max. :7.000 Max. :6.800 Max. :6.00
## NA's :13 NA's :10 NA's :10 NA's :10
## CIS_Activity PSQI_component1 PSQI_component2 PSQI_component3
## Min. :1.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:2.330 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :3.000 Median :1.000 Median :1.000 Median :0.0000
## Mean :3.259 Mean :1.107 Mean :1.261 Mean :0.5101
## 3rd Qu.:4.330 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :6.330 Max. :2.000 Max. :3.000 Max. :3.0000
## NA's :10 NA's :23 NA's :11 NA's :23
## PSQI_component4 PSQI_component5 PSQI_component6 PSQI_component7
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :1.000 Median :0.0000 Median :1.000
## Mean :0.4362 Mean :1.112 Mean :0.1544 Mean :1.422
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :2.000 Max. :3.0000 Max. :3.000
## NA's :23 NA's :11 NA's :23 NA's :11
## PSQI_GlobalScore MCTQ_MSFsc MCTQ_SocialJetlag AUDIT_Score
## Min. : 1.000 Min. :2.440 Min. :0.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.:4.407 1st Qu.:0.980 1st Qu.: 5.000
## Median : 6.000 Median :5.105 Median :1.520 Median : 9.000
## Mean : 5.906 Mean :5.113 Mean :1.555 Mean : 8.886
## 3rd Qu.: 7.000 3rd Qu.:5.870 3rd Qu.:2.010 3rd Qu.:12.000
## Max. :15.000 Max. :8.080 Max. :4.500 Max. :25.000
## NA's :23 NA's :32 NA's :32 NA's :23
filtered_data <- data.frame( 'Trait self-control' = data_Sleep_Fatigue_AlcoholUse$TSC,
'Concentration (CIS)' = data_Sleep_Fatigue_AlcoholUse$CIS_Concentration,
'Disturbances in sleep quality (PSQI)' = data_Sleep_Fatigue_AlcoholUse$PSQI_GlobalScore,
'Chronotype (MCTQ)' = data_Sleep_Fatigue_AlcoholUse$MCTQ_MSFsc,
'Alcohol usage (AUDIT)' = data_Sleep_Fatigue_AlcoholUse$AUDIT_Score)
colnames(filtered_data) <- c('Trait self-control', 'Concentration (CIS)', 'Disturbances in sleep quality (PSQI)', 'Chronotype (MCTQ)', 'Alcohol usage (AUDIT)')
description_filtered_data <- filtered_data %>%
describe(fast = TRUE)
shown_columns <- c('n', 'mean', 'sd', 'min', 'max')
description_filtered_data <- description_filtered_data[shown_columns]
colnames(description_filtered_data) <- c('*n*', '*mean*', '*sd*', '*min*', '*max*')
kable(description_filtered_data,
digits = 2,
caption = 'Table 1. Descriptive Statistics') |>
kable_classic() |>
kable_styling(full_width = FALSE, font_size = 20)
| n | mean | sd | min | max | |
|---|---|---|---|---|---|
| Trait self-control | 162 | 2.83 | 0.52 | 1.85 | 4.31 |
| Concentration (CIS) | 162 | 3.93 | 1.12 | 1.60 | 6.80 |
| Disturbances in sleep quality (PSQI) | 149 | 5.91 | 2.43 | 1.00 | 15.00 |
| Chronotype (MCTQ) | 140 | 5.11 | 1.08 | 2.44 | 8.08 |
| Alcohol usage (AUDIT) | 149 | 8.89 | 5.23 | 0.00 | 25.00 |
create_hist <- function(x, data, label, fill_color, min_value, max_value) {
plot <- ggplot(data, aes(x = .data[[x]])) +
geom_histogram(fill=fill_color, bins=10) +
xlim(min_value, max_value) +
xlab(label) +
ylab('Frequency')
return(plot)
}
filtered_data <- na.omit(filtered_data)
plots_step_2 = list()
for (column in colnames(filtered_data)) {
newplot = create_hist(column, filtered_data, column, "red", min(filtered_data[column]), max(filtered_data[column]))
plots_step_2 <- append(plots_step_2, list(newplot))
}
figure1 <- ggarrange(plotlist = plots_step_2)
## Warning: Removed 2 rows containing missing values (`geom_bar()`).
## Removed 2 rows containing missing values (`geom_bar()`).
## Removed 2 rows containing missing values (`geom_bar()`).
## Removed 2 rows containing missing values (`geom_bar()`).
## Removed 2 rows containing missing values (`geom_bar()`).
print(figure1)
#we need to use the lowerbound for the graph, because it is lower than the minimum value
sd_2 <- sd(filtered_data[[1]])*2
mean <- mean(filtered_data[[1]])
lowerbound <- (mean - sd_2)
upperbound <- (mean + sd_2)
figure2 <- create_hist('Trait self-control', filtered_data, 'Trait self-control', 'red', lowerbound, max(filtered_data[[1]])) +
geom_vline(xintercept = mean(filtered_data[[1]]), color = 'black') +
ggtitle('Figure 2')
mean_rounded <- round(mean(filtered_data[[1]]),2)
figure3 <- figure2 +
annotate('text', label = paste('mean = ', mean_rounded), x = 3.10, y = 21) +
ggtitle('Figure 3')
print(figure3)
## Warning: Removed 2 rows containing missing values (`geom_bar()`).
outliers <- data_Sleep_Fatigue_AlcoholUse %>%
filter(TSC > upperbound | TSC < lowerbound)
ID_outliers <- outliers$ID
if (length(ID_outliers)<1){
print('There are no outliers')
print(paste('The maximum is',max(filtered_data[[1]])))
print(paste('The minimum is', min(filtered_data[[1]])))
} else {
print(paste('outlier:', ID_outliers))
}
## [1] "outlier: 142" "outlier: 217"
figure4 <-
figure3 +
geom_vline(xintercept = upperbound) +
geom_vline(xintercept = lowerbound) +
ggtitle('Figure 5')
figure5 <- ggplotly(figure4)
figure5
figure6 <- ggplot() +
geom_boxplot(aes(data_Sleep_Fatigue_AlcoholUse$TSC))+
xlab('Trait self-control') +
ylab('frequency') +
ggtitle('Figure 6')
print(figure6)
## Warning: Removed 10 rows containing non-finite values (`stat_boxplot()`).
The previous conclusion changes with respect to outliers, since the boxplot only visualizes a single outlier, compared to the two determined using a difference of twice the standard deviation.
corr <- cor(filtered_data)
figure7 <- ggcorrplot(corr, hc.order = TRUE, lab = TRUE)
print(figure7)